from pyspark import SparkContext

sc = SparkContext(appName="test")

rdd = sc.textFile("/data/student/students.txt")

rdd.map(lambda line: (line.split(",")[4], 1)) \
    .reduceByKey(lambda a, b: a + b) \
    .map(lambda t: "%s\t%d" % (t[0].encode("utf-8"), t[1])) \
    .saveAsTextFile("/data/count")
